import tensorflow as tf
import gym

if __name__ == '__main__':
    env = gym.make('Breakout-v0')
    print(env.action_space.n, env.observation_space.shape)
    print(env.get_action_meanings())
    state = env.reset()
    for t in range(1, 1000 + 1):
        env.render()
        print(state.shape)
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)

        if done:  # 游戏结束则退出本轮循环，进行下一个 episode
            print("Episode finished after {} steps".format(t + 1))
            break
    env.close()
